from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
import operator
import re

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36'}
'''
URL = "https://reading.jnu.edu.cn/readlist/20172/"
html = requests.get(URL, headers=header).text
bs = BeautifulSoup(html, "html5lib")
bs_type_list = bs.select("td a")
url_list = []
for type in bs_type_list:
    url_list.append("https://reading.jnu.edu.cn" + type.get("href"))

print(url_list)
book_url_list = []
for url in url_list:
    type_html = requests.get(url.strip(), headers=header).text
    type_bs = BeautifulSoup(type_html, "html5lib")
    book_list = type_bs.select(".t-data-grid tbody tr td a")
    for book in book_list[:10]:
        book_url_list.append("https://reading.jnu.edu.cn" + book.get("href"))

print(book_url_list)

'''

book_url_list=[
               'https://reading.jnu.edu.cn/readlist.detail/K248.09$002f20071?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I313.45$002f201479?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.5$002f200832?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.53$002f20073.3?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I712.45$002f200665?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/K248.09$002f20071-2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.57$002f200234-3.3?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.5$002f8310$00282$0029-4.2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.58$002f9422-3.2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I246.57$002f20091?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I775.45$002f20121?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I313.45$002f20111.2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/G640$002f20109?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I246.5$002f8111-2.2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/G640$002f201010?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I253.7$002f201236?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.55$002f20111?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/D922.161$002f20043.2?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/G645.16$002f20061.3?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I253.7$002f20114?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I246.7$002f20128?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I775.45$002f831$00282$0029-4?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I561.45$002f201640?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I313.45$002f201091$002f1?t:ac=20172',
               'https://reading.jnu.edu.cn/readlist.detail/I247.5$002f8310$00282$0029-2?t:ac=20172']
for url in book_url_list:
    book_html=requests.get(url.strip(),headers=header).text
    book_bs=BeautifulSoup(book_html,"html5lib")
    book_info={}
    if(len(book_bs.select(".briefcitTitle a"))!=0):
        detail_url ="http://202.116.13.244" + book_bs.select(".briefcitTitle a")[0].get("href")
    else:
        detail_url=url

    book_html = requests.get(detail_url.strip(), headers=header).text
    book_bs = BeautifulSoup(book_html, "html5lib")
    book_isbn=book_bs.select(".bibInfoEntry td")[14].find_next_sibling()
    book_isbn=str(book_isbn.contents).replace("\\n","")
    print(book_isbn[2:15])
    if len(book_isbn)>15:
        continue
    else:
        book_isbn=book_isbn
    print(book_isbn)
    #print(detail_url)


